head(readstate)
multi_cluster <- readstate %>%
group_by(state,months) %>%
summarise(MaximumTemperature= max(MaximumTemperature),
MinimumTemperature= min(MinimumTemperature),
Precipitation = mean(Precipitation),
AverageTemperature = mean(AverageTemperature),
PalmerDroughtSeverityIndexPDSI = mean(PalmerDroughtSeverityIndexPDSI)
)
`summarise()` has grouped output by 'state'. You can override using the
`.groups` argument.
multi_cluster <- multi_cluster %>%
group_by(state) %>%
summarise(max(MaximumTemperature), min(MinimumTemperature), mean(Precipitation)
, mean(AverageTemperature), mean(PalmerDroughtSeverityIndexPDSI))
multi_cluster[multi_cluster$state=="Alaska",6] <- 0
which(is.na(multi_cluster))
integer(0)
dataset.e <- dist(multi_cluster, method = 'euclidean') #no method -> euclidean (default)
Warning in dist(multi_cluster, method = "euclidean") :
NAs introduced by coercion
dataset.es <- hclust(dataset.e, method = 'single')
dataset.ea <- hclust(dataset.e, method = 'average')
dataset.ec <- hclust(dataset.e, method = 'complete')
dataset.ew <- hclust(dataset.e, method = 'ward.D2')
#plot of dendograms
par(mfrow=c(1,3))
plot(dataset.es, main='euclidean-single', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ec, main='euclidean-complete', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ea, main='euclidean-average', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ew, main='euclidean-ward', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
#cutting dendograms: (with k=2 fixed)
cluster.ec <- cutree(dataset.ec, k=3) # euclidean-complete:
cluster.ea <- cutree(dataset.ea, k=3) # euclidean-average:
cluster.es <- cutree(dataset.es, k=3) # euclidean-simple:
cluster.ew <- cutree(dataset.ew, k=3) # euclidean-simple:
# Let's give a mark to the algorithms: did they aggregate coherently with
# the dissimilarity matrix or not?
# compute the cophenetic matrices
coph.es <- cophenetic(dataset.es)
coph.ec <- cophenetic(dataset.ec)
coph.ea <- cophenetic(dataset.ea)
coph.ew <- cophenetic(dataset.ew)
# compute cophenetic coefficients (<-> corr(D,C)) D:dist mat; C:coph dist mat
es <- cor(dataset.e, coph.es)
ec <- cor(dataset.e, coph.ec)
ea <- cor(dataset.e, coph.ea)
ew <- cor(dataset.e, coph.ew)
c("Eucl-Single"=es,"Eucl-Compl."=ec,"Eucl-Ave."=ea, "Eucl-War."=ew)
Eucl-Single Eucl-Compl. Eucl-Ave. Eucl-War.
0.5224238 0.6759758 0.7415526 0.6729451
Ha più senso utilizzare Euclidean-Average con k=3
library(plotly)
us_data <- map_data("state")
df <- data.frame(
state = tolower(multi_cluster$state),
values = cluster.ew
)
library(usmap)
plot_usmap(data = df) + labs(title = "Multivariate cluster (all cov)")
#Now standardize data
std_multi_cluster<- scale(multi_cluster[,-1])
dataset.e <- dist(std_multi_cluster, method = 'euclidean') #no method -> euclidean (default)
dataset.es <- hclust(dataset.e, method = 'single')
dataset.ea <- hclust(dataset.e, method = 'average')
dataset.ec <- hclust(dataset.e, method = 'complete')
dataset.ew <- hclust(dataset.e, method = 'ward.D2')
#plot of dendograms
par(mfrow=c(1,3))
plot(dataset.es, main='euclidean-single', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ec, main='euclidean-complete', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ea, main='euclidean-average', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ew, main='euclidean-ward', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
#cutting dendograms: (with k=2 fixed)
cluster.ec <- cutree(dataset.ec, k=3)
cluster.ea <- cutree(dataset.ea, k=4) # va bene 3 o 4
cluster.es <- cutree(dataset.es, k=3)
cluster.ew <- cutree(dataset.ew, k=4) #va bene 3 o 4
# Let's give a mark to the algorithms: did they aggregate coherently with
# the dissimilarity matrix or not?
# compute the cophenetic matrices
coph.es <- cophenetic(dataset.es)
coph.ec <- cophenetic(dataset.ec)
coph.ea <- cophenetic(dataset.ea)
coph.ew <- cophenetic(dataset.ew)
# compute cophenetic coefficients (<-> corr(D,C)) D:dist mat; C:coph dist mat
es <- cor(dataset.e, coph.es)
ec <- cor(dataset.e, coph.ec)
ea <- cor(dataset.e, coph.ea)
ew <- cor(dataset.e, coph.ew)
c("Eucl-Single"=es,"Eucl-Compl."=ec,"Eucl-Ave."=ea, "Eucl-War."=ew)
Eucl-Single Eucl-Compl. Eucl-Ave. Eucl-War.
0.6191115 0.6911089 0.7474144 0.5645727
Anche in questo caso E-A ma k=2 migliore (ignoreremo ciò e usiamo k=3)
cluster_index
state temp_min temp_max temp_avg prec pdsi multi_all_ward
[1,] "Alabama" "1" "1" "3" "3" "1" "1"
[2,] "Alaska" "2" "3" "1" "1" NA "2"
[3,] "Arizona" "1" "1" "3" "2" "3" "3"
[4,] "Arkansas" "1" "1" "3" "1" "2" "1"
[5,] "California" "1" "1" "3" "1" "3" "3"
[6,] "Colorado" "3" "1" "2" "1" "3" "2"
[7,] "Connecticut" "1" "1" "2" "1" "1" "4"
[8,] "Delaware" "1" "1" "2" "3" "1" "4"
[9,] "Florida" "1" "1" "3" "1" "3" "1"
[10,] "Georgia" "1" "1" "3" "3" "1" "1"
[11,] "Idaho" "2" "2" "1" "1" "3" "2"
[12,] "Illinois" "1" "1" "2" "1" "2" "4"
[13,] "Indiana" "1" "1" "2" "1" "2" "4"
[14,] "Iowa" "3" "2" "1" "1" "2" "2"
[15,] "Kansas" "1" "1" "2" "1" "2" "1"
[16,] "Kentucky" "1" "1" "2" "1" "2" "4"
[17,] "Louisiana" "1" "1" "3" "1" "2" "1"
[18,] "Maine" "3" "3" "1" "1" "1" "2"
[19,] "Maryland" "1" "1" "2" "3" "2" "4"
[20,] "Massachusetts" "1" "1" "2" "1" "1" "4"
[21,] "Michigan" "3" "3" "1" "1" "2" "2"
[22,] "Minnesota" "3" "2" "1" "1" "2" "2"
[23,] "Mississippi" "1" "1" "3" "1" "2" "1"
[24,] "Missouri" "1" "1" "2" "1" "2" "4"
[25,] "Montana" "2" "2" "1" "1" "3" "2"
[26,] "Nebraska" "3" "2" "2" "1" "2" "2"
[27,] "Nevada" "1" "1" "2" "1" "3" "3"
[28,] "New Hampshire" "3" "3" "1" "1" "1" "2"
[29,] "New Jersey" "1" "1" "2" "1" "1" "4"
[30,] "New Mexico" "1" "1" "2" "2" "3" "3"
[31,] "New York" "3" "3" "1" "1" "1" "2"
[32,] "North Carolina" "1" "1" "3" "3" "1" "1"
[33,] "North Dakota" "2" "2" "1" "1" "1" "2"
[34,] "Ohio" "1" "1" "2" "1" "2" "4"
[35,] "Oklahoma" "1" "1" "3" "1" "2" "1"
[36,] "Oregon" "1" "1" "2" "1" "3" "4"
[37,] "Pennsylvania" "1" "1" "2" "1" "2" "4"
[38,] "Rhode Island" "1" "1" "2" "1" "1" "4"
[39,] "South Carolina" "1" "1" "3" "3" "1" "1"
[40,] "South Dakota" "3" "2" "1" "1" "2" "2"
[41,] "Tennessee" "1" "1" "3" "1" "2" "4"
[42,] "Texas" "1" "1" "3" "1" "3" "1"
[43,] "Utah" "3" "1" "2" "1" "3" "3"
[44,] "Vermont" "3" "3" "1" "1" "1" "2"
[45,] "Virginia" "1" "1" "2" "3" "2" "4"
[46,] "Washington" "1" "2" "2" "1" "3" "4"
[47,] "West Virginia" "1" "1" "2" "3" "2" "4"
[48,] "Wisconsin" "3" "3" "1" "1" "2" "2"
[49,] "Wyoming" "2" "2" "1" "3" "3" "2"
std_multi_cluster<- scale(multi_cluster[,-c(1,6)])
dataset.e <- dist(std_multi_cluster, method = 'euclidean') #no method -> euclidean (default)
dataset.es <- hclust(dataset.e, method = 'single')
dataset.ea <- hclust(dataset.e, method = 'average')
dataset.ec <- hclust(dataset.e, method = 'complete')
dataset.ew <- hclust(dataset.e, method = 'ward.D2')
#plot of dendograms
par(mfrow=c(1,3))
plot(dataset.es, main='euclidean-single', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ec, main='euclidean-complete', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ea, main='euclidean-average', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
plot(dataset.ew, main='euclidean-ward', hang=-0.1, xlab='', labels=F, cex=0.6, sub='')
#cutting dendograms: (with k=2 fixed)
cluster.ec <- cutree(dataset.ec, k=3) # euclidean-complete:
cluster.ea <- cutree(dataset.ea, k=3) # euclidean-average:
cluster.es <- cutree(dataset.es, k=3) # euclidean-simple:
cluster.ew <- cutree(dataset.ew, k=4) # euclidean-simple:
# Let's give a mark to the algorithms: did they aggregate coherently with
# the dissimilarity matrix or not?
# compute the cophenetic matrices
coph.es <- cophenetic(dataset.es)
coph.ec <- cophenetic(dataset.ec)
coph.ea <- cophenetic(dataset.ea)
coph.ew <- cophenetic(dataset.ew)
# compute cophenetic coefficients (<-> corr(D,C)) D:dist mat; C:coph dist mat
es <- cor(dataset.e, coph.es)
ec <- cor(dataset.e, coph.ec)
ea <- cor(dataset.e, coph.ea)
ew <- cor(dataset.e, coph.ew)
c("Eucl-Single"=es,"Eucl-Compl."=ec,"Eucl-Ave."=ea, "Eucl-War."=ew)
Eucl-Single Eucl-Compl. Eucl-Ave. Eucl-War.
0.6094244 0.5573119 0.7680538 0.5700928
cluster_index
state temp_min temp_max temp_avg prec pdsi multi_all_ward
[1,] "Alabama" "1" "1" "3" "3" "1" "1"
[2,] "Alaska" "2" "3" "1" "1" NA "2"
[3,] "Arizona" "1" "1" "3" "2" "3" "3"
[4,] "Arkansas" "1" "1" "3" "1" "2" "1"
[5,] "California" "1" "1" "3" "1" "3" "3"
[6,] "Colorado" "3" "1" "2" "1" "3" "2"
[7,] "Connecticut" "1" "1" "2" "1" "1" "4"
[8,] "Delaware" "1" "1" "2" "3" "1" "4"
[9,] "Florida" "1" "1" "3" "1" "3" "1"
[10,] "Georgia" "1" "1" "3" "3" "1" "1"
[11,] "Idaho" "2" "2" "1" "1" "3" "2"
[12,] "Illinois" "1" "1" "2" "1" "2" "4"
[13,] "Indiana" "1" "1" "2" "1" "2" "4"
[14,] "Iowa" "3" "2" "1" "1" "2" "2"
[15,] "Kansas" "1" "1" "2" "1" "2" "1"
[16,] "Kentucky" "1" "1" "2" "1" "2" "4"
[17,] "Louisiana" "1" "1" "3" "1" "2" "1"
[18,] "Maine" "3" "3" "1" "1" "1" "2"
[19,] "Maryland" "1" "1" "2" "3" "2" "4"
[20,] "Massachusetts" "1" "1" "2" "1" "1" "4"
[21,] "Michigan" "3" "3" "1" "1" "2" "2"
[22,] "Minnesota" "3" "2" "1" "1" "2" "2"
[23,] "Mississippi" "1" "1" "3" "1" "2" "1"
[24,] "Missouri" "1" "1" "2" "1" "2" "4"
[25,] "Montana" "2" "2" "1" "1" "3" "2"
[26,] "Nebraska" "3" "2" "2" "1" "2" "2"
[27,] "Nevada" "1" "1" "2" "1" "3" "3"
[28,] "New Hampshire" "3" "3" "1" "1" "1" "2"
[29,] "New Jersey" "1" "1" "2" "1" "1" "4"
[30,] "New Mexico" "1" "1" "2" "2" "3" "3"
[31,] "New York" "3" "3" "1" "1" "1" "2"
[32,] "North Carolina" "1" "1" "3" "3" "1" "1"
[33,] "North Dakota" "2" "2" "1" "1" "1" "2"
[34,] "Ohio" "1" "1" "2" "1" "2" "4"
[35,] "Oklahoma" "1" "1" "3" "1" "2" "1"
[36,] "Oregon" "1" "1" "2" "1" "3" "4"
[37,] "Pennsylvania" "1" "1" "2" "1" "2" "4"
[38,] "Rhode Island" "1" "1" "2" "1" "1" "4"
[39,] "South Carolina" "1" "1" "3" "3" "1" "1"
[40,] "South Dakota" "3" "2" "1" "1" "2" "2"
[41,] "Tennessee" "1" "1" "3" "1" "2" "4"
[42,] "Texas" "1" "1" "3" "1" "3" "1"
[43,] "Utah" "3" "1" "2" "1" "3" "3"
[44,] "Vermont" "3" "3" "1" "1" "1" "2"
[45,] "Virginia" "1" "1" "2" "3" "2" "4"
[46,] "Washington" "1" "2" "2" "1" "3" "4"
[47,] "West Virginia" "1" "1" "2" "3" "2" "4"
[48,] "Wisconsin" "3" "3" "1" "1" "2" "2"
[49,] "Wyoming" "2" "2" "1" "3" "3" "2"
multi_all_avg multi_nopdsi_ward multi_nopdsi_avg
[1,] "1" "1" "1"
[2,] "2" "2" "2"
[3,] "3" "3" "1"
[4,] "1" "4" "3"
[5,] "3" "3" "1"
[6,] "3" "2" "3"
[7,] "4" "4" "3"
[8,] "4" "4" "3"
[9,] "1" "1" "1"
[10,] "1" "1" "1"
[11,] "3" "2" "3"
[12,] "4" "4" "3"
[13,] "4" "4" "3"
[14,] "4" "2" "3"
[15,] "4" "4" "3"
[16,] "4" "4" "3"
[17,] "1" "1" "1"
[18,] "4" "2" "3"
[19,] "4" "4" "3"
[20,] "4" "4" "3"
[21,] "4" "2" "3"
[22,] "4" "2" "3"
[23,] "1" "1" "1"
[24,] "4" "4" "3"
[25,] "3" "2" "3"
[26,] "4" "2" "3"
[27,] "3" "3" "3"
[28,] "4" "2" "3"
[29,] "4" "4" "3"
[30,] "3" "3" "3"
[31,] "4" "2" "3"
[32,] "1" "4" "3"
[33,] "3" "2" "3"
[34,] "4" "4" "3"
[35,] "4" "4" "1"
[36,] "3" "4" "3"
[37,] "4" "4" "3"
[38,] "4" "4" "3"
[39,] "1" "1" "1"
[40,] "4" "2" "3"
[41,] "4" "4" "3"
[42,] "1" "3" "1"
[43,] "3" "3" "3"
[44,] "4" "2" "3"
[45,] "4" "4" "3"
[46,] "4" "4" "3"
[47,] "4" "4" "3"
[48,] "4" "2" "3"
[49,] "3" "2" "3"
plot_ly(x=std_multi_cluster[,1], y=std_multi_cluster[,2], z=std_multi_cluster[,3], color=cluster.ea, pch=19)
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
No scatter3d mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Warning: 'scatter3d' objects don't have these attributes: 'pch'
Valid attributes include:
'connectgaps', 'customdata', 'customdatasrc', 'error_x', 'error_y', 'error_z', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'projection', 'scene', 'showlegend', 'stream', 'surfaceaxis', 'surfacecolor', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
No scatter3d mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Warning: 'scatter3d' objects don't have these attributes: 'pch'
Valid attributes include:
'connectgaps', 'customdata', 'customdatasrc', 'error_x', 'error_y', 'error_z', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'marker', 'meta', 'metasrc', 'mode', 'name', 'opacity', 'projection', 'scene', 'showlegend', 'stream', 'surfaceaxis', 'surfacecolor', 'text', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'